* Examine recall errors in HILDA data

local hildadir "H:\Documents\HILDA Project\Data\HILDA Release\Release 16\Stata 160c"
local workdir  H:\Documents\HILDA Project\Data\HILDA Working\Release 16\residential mobility
local logdir   H:\Documents\HILDA Project\ARC Methodology\UQ research\Chapter 3 - Mobility\Results

capture log close
log using "`logdir'\log_recall_errors.log", replace

use "`workdir'\move_long", clear
* add interview pattern
rename xwaveid xwaveidn
tostring(xwaveidn), gen(xwaveid) format(%07.0f)
merge m:1 xwaveid using "`hildadir'\master_p160c", keepusing(xwaveid ivwptn)
drop xwaveid _merge
rename xwaveidn xwaveid

* define recall error type
tab move12m5 pmove5 if wave==11
gen error=0 if pmove5==move12m5
replace error=1 if pmove5==1 & move12m5==0
replace error=2 if pmove5==0 & move12m5==1
label variable error "Recall error"
label define err 0 "Consistent" 1 "Not report move but move" 2 "Reported move but no move" 
label values error err

*own (26 missing as DK/Ref)
gen own=1 if hstenr==1 | hstenr==3
replace own=0 if hstenr==2 | hstenr==4
* bachelor & above (assume unknown are not bachelor
gen bach=1 if edhigh1>=1 & edhigh1<=3
replace bach=0 if edhigh1>=4 & edhigh1<=10
* Highest level of education
gen ed=1 if edhigh1>=1 & edhigh1<=3  // Bachelor and above
replace ed=2 if edhigh1==4 | edhigh1==5 | edhigh1==8    // Advanced Diploma/Diploma, Certificates III/IV, Year 12
replace ed=3 if edhigh1==9           // Year 11 and below (includes codes 6 and 7 of edhigh before it was recoded to edhigh1)
tab ed edhigh1, miss
* country of birth
gen mes=1 if anbcob==2
replace mes=0 if anbcob==1 | anbcob==3
gen nmes=1 if anbcob==3
replace nmes=0 if anbcob==1 | anbcob==2
* duration since last interview
gen idate=mdy(real(substr(hhidate,4,2)),real(substr(hhidate,1,2)),real(substr(hhidate,7,4)))
sort xwaveid wave
gen idate_m5=L5.idate
* duration since last interview
gen dur5i=(idate-idate_m5) if idate!=. & idate_m5!=.
gen dur5i2=dur5i*dur5i
sum dur5i
* distance from anniversary date of last interview
gen anniv5d=(idate-idate_m5)-365*5 if idate!=. & idate_m5!=.
gen anniv5d2=anniv5d*anniv5d
sum anniv5d

* Age categories
tabulate ageg, gen(agecat)

* Alternative age categories
gen age00=(ageg==1  | ageg==2 )
gen age10=(ageg==3  | ageg==4 )
gen age20=(ageg==5  | ageg==6 )
gen age30=(ageg==7  | ageg==8 )
gen age40=(ageg==9  | ageg==10)
gen age50=(ageg==11 | ageg==12)
gen age60=(ageg==13 | ageg==14)
gen age70=(ageg==15 | ageg==16)
gen age80=(ageg==17 | ageg==18)

gen age30_40=(ageg==7  | ageg==8 | ageg==9  | ageg==10)
gen age50_60=(ageg==11 | ageg==12 | ageg==13 | ageg==14)
gen age70_80=(ageg==15 | ageg==16 | ageg==17 | ageg==18)

* interactions with age
forvalues i=5 / 18 {
  gen fagecat`i'=agecat`i'*female
  gen bagecat`i'=agecat`i'*bach
}

* wave flags
gen wave6=(wave==6)
gen wave11=(wave==11)
gen wave16=(wave==16)

* average mobility rate for 5-year age group
bys ageg: egen meanmob5=mean(pmove5)

* rate of errors (average over 3 waves)
tab error  if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1 & meanmob5!=.
bys wave: tab error  if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1 & meanmob5!=.

* multinomial logit of errors
* tried
* fagecat5-fagecat18 but not signifiant
* bagecat* but not significant
* ed only sig diff for uni degree
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
*xi:mlogit error agecat6-agecat18 /*agecat6-agecat18*/ female /*own*/ bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)

* test coefficients
test [Reported_move_but_no_move]nmes=[Reported_move_but_no_move]mes

tab error if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1
tab error if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hhsm==1

/*
* use gsem to apply the multinomial model allowing for random intercepts
xi: gsem (1.error <- age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16 M1[xwaveid])  ///
     (2.error <- age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16 M2[xwaveid])      ///
	 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, mlogit /*iterate(100)*/

xi: gsem (1.error <- /*age30_40 age50_60 age70_80*/ /*agecat6-agecat18*/ /*female*/ /*own*/ /*bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16*/ M1[xwaveid])  ///
     (2.error <- /*age30_40 age50_60 age70_80*/ /*agecat6-agecat18*/ /*female*/ /*own*/ /*bach mes nmes meanmob5 anniv5d anniv5d2 wave11 wave16*/ M2[xwaveid])      ///
	 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, mlogit iterate(70)
*/
	 
	 
* Test whether temporary attachment to residence may lead to more errors - eg living apart together, commuters (i.e. has long commute - dont know distance commuted until W17), 
* mining & construction (to pick up flyin-flyout workers), people with holiday homes, labour hire workers, seasonal workers (identified from reason job ended)
 
* indicator for intimate relationship outside of dwelling (proxy for living apart together but will include couples who have not made decision to live apart) - from W5, 11 and 15 (make var relevant to W6, W11, W16)
sort xwaveid wave
tab nrhave wave
gen nrhavem1=l.nrhave
gen intrel=1 if nrhave==1 | nrhavem1==1
replace intrel=0 if intrel==. & ((nrhave==2 | nrhave==-1) | (nrhavem1==2 | nrhavem1==-1))
tab intrel wave if wave==6 |  wave==11 |  wave==16, miss col
* indicator for long commuters (more than 15 hours per week)
tab lscom wave if lscom>=15
gen lscomm1= l.lscom
gen lscomm2=l2.lscom
gen lscomm3=l3.lscom
gen lscomm4=l4.lscom
gen longcom=1 if (lscom>=15 & lscom!=.) | (lscomm1>=15 & lscomm1!=.) | (lscomm2>=15 & lscomm2!=.) | (lscomm3>=15 & lscomm3!=.) | (lscomm4>=15 & lscomm4!=.)
replace longcom=0 if longcom==. & ((lscom>=0 & lscom!=.) | (lscomm1>=0 & lscomm1!=.) | (lscomm2>=0 & lscomm2!=.) | (lscomm3>=0 & lscomm3!=.) | (lscomm4>=0 & lscomm4!=.))
tab wave longcom, miss
tab wave longcom, row
* indicator for mining (in any of 5 waves)
tab jbmi61
gen jbmi61m1= l.jbmi61
gen jbmi61m2=l2.jbmi61
gen jbmi61m3=l3.jbmi61
gen jbmi61m4=l4.jbmi61
gen mining=1 if (jbmi61==2) | (jbmi61m1==2) | (jbmi61m2==2) | (jbmi61m3==2) | (jbmi61m4==2)
replace mining=0 if mining==. & ((jbmi61>=-1 & jbmi61!=.) | (jbmi61m1>=-1 & jbmi61m1!=.) | (jbmi61m2>=-1 & jbmi61m2!=.) | (jbmi61m3>=-1 & jbmi61m3!=.) | (jbmi61m4>=-1 & jbmi61m4!=.))
tab wave mining, miss
tab wave mining, row
* indicator for construction (in any of 5 waves)
gen construct=1 if (jbmi61==5) | (jbmi61m1==5) | (jbmi61m2==5) | (jbmi61m3==5) | (jbmi61m4==5)
replace construct=0 if construct==. & ((jbmi61>=-1 & jbmi61!=.) | (jbmi61m1>=-1 & jbmi61m1!=.) | (jbmi61m2>=-1 & jbmi61m2!=.) | (jbmi61m3>=-1 & jbmi61m3!=.) | (jbmi61m4>=-1 & jbmi61m4!=.))
tab wave construct, miss
tab wave construct, row
* indicator for holiday home - from W6, 10, 14 (make var relevant to W6, W11, W16)
tab opt2hnr wave
tab opt2hr wave
gen opt2hnrm1= l.opt2hnr
gen opt2hnrm2=l2.opt2hnr
gen opt2hrm1= l.opt2hr
gen opt2hrm2=l2.opt2hr
gen holhouse=1 if (opt2hnr==1 | opt2hr==1) | (opt2hnrm1==1 | opt2hrm1==1) | (opt2hnrm2==1 | opt2hrm2==1)
replace holhouse=0 if holhouse==. & ((opt2hnr==0 & opt2hr==0) | (opt2hnrm1==0 & opt2hrm1==0) | (opt2hnrm2==0 & opt2hrm2==0))
tab wave holhouse, miss
* indicator for labour hire worker
tab jbmlh
gen jbmlhm1= l.jbmlh
gen jbmlhm2=l2.jbmlh
gen jbmlhm3=l3.jbmlh
gen jbmlhm4=l4.jbmlh
gen labourhire=1 if (jbmlh==1) | (jbmlhm1==1) | (jbmlhm2==1) | (jbmlhm3==1) | (jbmlhm4==1)
replace labourhire=0 if labourhire==. & ((jbmlh>=-1 & jbmlh!=.) | (jbmlhm1>=-1 & jbmlhm1!=.) | (jbmlhm2>=-1 & jbmlhm2!=.) | (jbmlhm3>=-1 & jbmlhm3!=.) | (jbmlhm4>=-1 & jbmlhm4!=.))
tab wave labourhire, miss
tab wave labourhire, row
* indicator for seasonal worker (based on reason job ended)
/**/
* first add variables
preserve
local workdir  H:\Documents\HILDA Project\Data\HILDA Working\Release 16\residential mobility
local alphabet abcdefghijklmnopqrstuvwxyz
local maxwave  16
forvalues wave=2 / `maxwave' {
  local w=substr("`alphabet'",`wave',1)
  use xwaveid xwaveid `w'pjljrea `w'pjorea `w'pjljr `w'pjotrea `w'ujljrea using "`hildadir'\rperson_`w'`maxwave'0c", clear
  renpfix `w' 
  gen wave=`wave'
  destring(xwaveid), replace
  if `wave'==2 {
    save "`workdir'\seasonal", replace
  }
  else {
    append using "`workdir'\seasonal"
    save "`workdir'\seasonal", replace
  }
}
restore
merge 1:1 xwaveid wave using "`workdir'\seasonal"
drop _merge
/**/
* create indicator for current wave - whether had job end because it was temporary or seasonal (cannot identify seasonal on its own)
gen tempseasw=1 if pjljrea==1 | pjorea==1 | pjljr==1 | pjotrea==1 | ujljrea==1
replace tempseasw=0 if tempseasw==. & hgint==1
tab wave tempseasw, miss
* create indicator for past 5 years
gen tempseaswm1= l.tempseasw
gen tempseaswm2=l2.tempseasw
gen tempseaswm3=l3.tempseasw
gen tempseaswm4=l4.tempseasw
gen tempseas=1 if (tempseasw==1) | (tempseaswm1==1) | (tempseaswm2==1) | (tempseaswm3==1) | (tempseaswm4==1)
replace tempseas=0 if tempseas==. & ((tempseasw>=0 & tempseasw!=.) | (tempseaswm1>=0 & tempseaswm1!=.) | (tempseaswm2>=0 & tempseaswm2!=.) | (tempseaswm3>=0 & tempseaswm3!=.) | (tempseaswm4>=0 & tempseaswm4!=.))
tab wave tempseas, miss
tab wave tempseas, row

* model error - with additional indicators for situations where there may be temporary attachment to dwelling
* each indicator separtely
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes intrel meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes longcom meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes mining meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes construct meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes holhouse meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes labourhire meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes tempseas meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
* all indicators
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes intrel longcom mining construct holhouse labourhire tempseas meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)
* most relevant indicators
xi:mlogit error age30 age40 age50 age60 age70 age80 /*agecat6-agecat18*/ female /*own*/ bach mes nmes mining holhouse meanmob5 anniv5d anniv5d2 wave11 wave16 if ((wave==11 & substr(ivwptn,7,5)=="XXXXX") | (wave==6 & substr(ivwptn,2,5)=="XXXXX") | (wave==16 & substr(ivwptn,12,5)=="XXXXX")) & hgage>=19 & hgint==1 & hhsm==1, cluster(xwaveid)

log close
